Clustering ASVs

Code
library(dplyr) ; library(tidyr) ; library(ggplot2)
# library(doParallel) ; library(foreach) ; library(doSNOW)

root <- rprojroot::has_file(".git/index")
datadir = root$find_file("data")
funsdir = root$find_file("functions")
savingdir = root$find_file("saved_files")
savingdirOceanSlices = root$find_file("saved_files_oceanSlices")
savingdirBinary = root$find_file("saved_files_binary") 

datapath = root$find_file(paste0(datadir,'/','grump.phaeocystis_asv_long.csv'))
files_vec <- list.files(funsdir)

for( i in 1:length(files_vec)){
  source(root$find_file(paste0(funsdir,'/',files_vec[i])))
}

dframe = data.table::fread(input = datapath) %>% filter(Cruise!="MOSAiC",Raw.Sequence.Counts>0)
dframe_allASVs = tidy_grump(Dframe = dframe,binnary = T)

useStoredFiles = F

The idea is to create a ‘custom’ distance matrix, to induce the grouppings.

  • \(c_1\) is within the same species - but no unassigned
  • \(c_2\) is between assigned species
  • \(c_3\) is between species and unassigned
  • \(c_4\) is within unassigned and unassigned

For this document we will use only the following configuration:

\(c_1=1\), \(c_2=1000\), \(c_3=10\) , \(c_4=10\)

A priori, the ‘probability’ of unassigned ASVs to agglomerate between themselves is the same as if it was to agglomerate with other species. Large \(c_2\) makes it harder for known species ASVs to group between themselves.

Code
inducedDist_ = inducedDist(
  dFrame = dframe_allASVs$dframe,
  c1 = 1,c2=1000,c3=10,c4=10,
  compMatrix = dframe_allASVs$ASV_composition %>% data.frame())

If we exclude the ASVs that appears only one ~or two~ times, does it help?

Code
nASVs <- dframe_allASVs$dframe %>% select(ASV_name) %>% distinct() %>% nrow()

plot0s_summ = dframe_allASVs$dframe %>% group_by(ASV_name,SampleKey) %>% 
  summarise(Freq=n()) %>%
  mutate(nSamplesObserved = sum(Freq)) %>% 
  arrange(nSamplesObserved) %>% 
  select(ASV_name,nSamplesObserved) %>% distinct() %>%
  mutate(FreqNSamples=cut(nSamplesObserved,breaks = c(0,1,2,3,5,10,25,50,100,250,1500),right = T,include.lowest = T)) %>% 
  group_by(FreqNSamples) %>% 
  summarise(Freq=n(),Pct=n()/nASVs) %>% 
  mutate(CummPct = cumsum(Pct))

### Creating vectors with id of ASVs to remove:
ASVs2Remove_df = dframe_allASVs$dframe %>% group_by(ASV_name,SampleKey) %>% 
  summarise(Freq=n()) %>%
  mutate(nSamplesObserved = sum(Freq)) %>% 
  arrange(nSamplesObserved) %>% 
  select(ASV_name,nSamplesObserved) %>% distinct() 

vetASVs_2orMore = ASVs2Remove_df %>% filter(nSamplesObserved<2) %>% select(ASV_name) %>% pull()
vetASVs_3orMore = ASVs2Remove_df %>% filter(nSamplesObserved<3) %>% select(ASV_name) %>% pull()

saveRDS(vetASVs_2orMore,file = paste0(savingdirOceanSlices,'/','asvs_to_remove_oneSample'))
saveRDS(vetASVs_3orMore,file = paste0(savingdirOceanSlices,'/','asvs_to_remove_twoSample'))


nsamples = dframe_allASVs$dframe %>% select(SampleID) %>% distinct() %>% nrow()
plot0s_summ2 = dframe_allASVs$dframe %>% 
  group_by(SampleKey) %>% 
  summarise(NumbersOfAsvInEachSample=n()) %>% 
  arrange(NumbersOfAsvInEachSample) %>% 
  mutate(FreqAsvs=cut(NumbersOfAsvInEachSample,breaks = c(0,1,2,3,5,10,25,50,100,250,500),
                      right = T,include.lowest = T)) %>% 
  group_by(FreqAsvs) %>% 
  summarise(Freq=n(),Pct=n()/nsamples) %>% 
  mutate(CummPct = cumsum(Pct))

plot0s_summ %>% knitr::kable() %>% kableExtra::kable_styling(full_width = FALSE, position = "left")
plot0s_summ2 %>% knitr::kable() %>% kableExtra::kable_styling(full_width = FALSE, position = "float_left")
FreqNSamples Freq Pct CummPct
[0,1] 715 0.4824561 0.4824561
(1,2] 169 0.1140351 0.5964912
(2,3] 80 0.0539811 0.6504723
(3,5] 103 0.0695007 0.7199730
(5,10] 121 0.0816464 0.8016194
(10,25] 129 0.0870445 0.8886640
(25,50] 67 0.0452092 0.9338731
(50,100] 48 0.0323887 0.9662618
(100,250] 41 0.0276653 0.9939271
(250,1.5e+03] 9 0.0060729 1.0000000
FreqAsvs Freq Pct CummPct
[0,1] 90 0.0911854 0.0911854
(1,2] 58 0.0587639 0.1499493
(2,3] 36 0.0364742 0.1864235
(3,5] 62 0.0628166 0.2492401
(5,10] 161 0.1631206 0.4123607
(10,25] 243 0.2462006 0.6585613
(25,50] 253 0.2563323 0.9148936
(50,100] 82 0.0830800 0.9979737
(100,250] 2 0.0020263 1.0000000

Lets now compare the distance matrix using mds, ltns, and umap.

Code
if(!useStoredFiles){
  aDist = vegan::vegdist(x = dframe_allASVs$ASV_composition %>% select(-name) %>% data.frame(),method = 'aitchison')
  coord_plots_all <- distMatrix_dimReduc_coords(distMatrix = as.matrix(aDist))
  saveRDS(coord_plots_all,file = paste0(savingdir,'/','coord_plots_all'))
}
coord_plots_all  = readRDS(file = paste0(savingdir,'/','coord_plots_all'))

dim_reduce_obj_all <- plot_dimReduc_coords(coord_plots_all)

dim_reduce_obj_all$MDS_2d
dim_reduce_obj_all$nMDS_2d
dim_reduce_obj_all$TSNE_2d
dim_reduce_obj_all$umap_2d
Run 0 stress 0.06468084 
Run 1 stress 0.07480603 
Run 2 stress 0.0730409 
Run 3 stress 0.07345601 
Run 4 stress 0.07522215 
Run 5 stress 0.07467133 
Run 6 stress 0.07474017 
Run 7 stress 0.07560192 
Run 8 stress 0.07449577 
Run 9 stress 0.07404174 
Run 10 stress 0.07397977 
Run 11 stress 0.07262368 
Run 12 stress 0.07255114 
Run 13 stress 0.07532366 
Run 14 stress 0.07304862 
Run 15 stress 0.07680197 
Run 16 stress 0.07180818 
Run 17 stress 0.07454453 
Run 18 stress 0.07792136 
Run 19 stress 0.07446214 
Run 20 stress 0.07575259 
*** Best solution was not repeated -- monoMDS stopping criteria:
    20: no. of iterations >= maxit

Code
dframe2plus = tidy_grump(Dframe = dframe,vet_ASVs2remove = vetASVs_2orMore)

if(!useStoredFiles){
  aDist = vegan::vegdist(x = dframe2plus$ASV_composition %>% select(-name) %>% data.frame(),method = 'aitchison')
  coord_plots_2ormore <- distMatrix_dimReduc_coords(distMatrix = as.matrix(aDist))
  saveRDS(coord_plots_2ormore,file = paste0(savingdir,'/','coord_plots_2ormore'))
}
coord_plots_2ormore  = readRDS(file = paste0(savingdir,'/','coord_plots_2ormore'))

dim_reduce_obj_2ormore <- plot_dimReduc_coords(coord_plots_2ormore)
dim_reduce_obj_2ormore$MDS_2d
dim_reduce_obj_2ormore$nMDS_2d
dim_reduce_obj_2ormore$TSNE_2d
dim_reduce_obj_2ormore$umap_2d
Run 0 stress 0.08168081 
Run 1 stress 0.08429471 
Run 2 stress 0.08720077 
Run 3 stress 0.08838941 
Run 4 stress 0.08728998 
Run 5 stress 0.08835142 
Run 6 stress 0.08889119 
Run 7 stress 0.08553928 
Run 8 stress 0.08729163 
Run 9 stress 0.08936481 
Run 10 stress 0.0900273 
Run 11 stress 0.08680416 
Run 12 stress 0.0872725 
Run 13 stress 0.0857184 
Run 14 stress 0.083441 
Run 15 stress 0.0866785 
Run 16 stress 0.08572318 
Run 17 stress 0.08578093 
Run 18 stress 0.08780596 
Run 19 stress 0.08687016 
Run 20 stress 0.085108 
*** Best solution was not repeated -- monoMDS stopping criteria:
    20: no. of iterations >= maxit

Code
dim(dframe2plus$ASV_composition)
[1] 767 975

We have 767 ASVs and 975 samples

Code
dframe3plus = tidy_grump(Dframe = dframe,vet_ASVs2remove = vetASVs_3orMore)

if(!useStoredFiles){
  aDist = vegan::vegdist(x = dframe3plus$ASV_composition %>% select(-name) %>% data.frame(),method = 'aitchison')
  coord_plots_3ormore <- distMatrix_dimReduc_coords(distMatrix = as.matrix(aDist),perplexityTsne = 150)
  saveRDS(coord_plots_3ormore,file = paste0(savingdir,'/','coord_plots_3ormore'))
}
coord_plots_3ormore  = readRDS(file = paste0(savingdir,'/','coord_plots_3ormore'))

dim_reduce_obj_3ormore <- plot_dimReduc_coords(coord_plots_3ormore)

dim_reduce_obj_3ormore$MDS_2d
dim_reduce_obj_3ormore$nMDS_2d
dim_reduce_obj_3ormore$TSNE_2d
dim_reduce_obj_3ormore$umap_2d
Run 0 stress 0.08718612 
Run 1 stress 0.09025029 
Run 2 stress 0.08938301 
Run 3 stress 0.09251931 
Run 4 stress 0.09070787 
Run 5 stress 0.09289893 
Run 6 stress 0.09116608 
Run 7 stress 0.0906269 
Run 8 stress 0.08901612 
Run 9 stress 0.09330492 
Run 10 stress 0.09101909 
Run 11 stress 0.094913 
Run 12 stress 0.08973203 
Run 13 stress 0.09116532 
Run 14 stress 0.09227395 
Run 15 stress 0.09204363 
Run 16 stress 0.09165805 
Run 17 stress 0.09108332 
Run 18 stress 0.09168284 
Run 19 stress 0.09580641 
Run 20 stress 0.09207231 
*** Best solution was not repeated -- monoMDS stopping criteria:
    20: no. of iterations >= maxit

Code
dim(dframe3plus$ASV_composition)
[1] 598 971

598 ASVs and 970 samples.

It feels that removing rare ASVs won’t help..

Code
if(!useStoredFiles){
  hammDistMatrix = hammingDist(X = dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>%  as.matrix())
  coord_plots_all_bin <- distMatrix_dimReduc_coords(distMatrix = as.matrix(hammDistMatrix),perplexityTsne = 150)
  saveRDS(coord_plots_all_bin,file = paste0(savingdir,'/','coord_plots_all_bin'))
}
coord_plots_all_bin  = readRDS(file = paste0(savingdir,'/','coord_plots_all_bin'))

coord_plots_all_bin_plots <- plot_dimReduc_coords(coord_plots_all_bin)

coord_plots_all_bin_plots$MDS_2d
coord_plots_all_bin_plots$nMDS_2d
coord_plots_all_bin_plots$TSNE_2d
coord_plots_all_bin_plots$umap_2d
Run 0 stress 0.04026351 
Run 1 stress 0.04973186 
Run 2 stress 0.05058196 
Run 3 stress 0.05179752 
Run 4 stress 0.04693137 
Run 5 stress 0.04758069 
Run 6 stress 0.0511412 
Run 7 stress 0.04596287 
Run 8 stress 0.05277352 
Run 9 stress 0.04661559 
Run 10 stress 0.04677193 
Run 11 stress 0.05037888 
Run 12 stress 0.04614352 
Run 13 stress 0.05694953 
Run 14 stress 0.05074769 
Run 15 stress 0.04656415 
Run 16 stress 0.04582944 
Run 17 stress 0.05573858 
Run 18 stress 0.05206533 
Run 19 stress 0.05362203 
Run 20 stress 0.05641374 
*** Best solution was not repeated -- monoMDS stopping criteria:
    20: no. of iterations >= maxit

Code
dframe2plus = tidy_grump(Dframe = dframe,vet_ASVs2remove = vetASVs_2orMore,binnary = T)

if(!useStoredFiles){
  hammDistMatrix = hammingDist(X = dframe2plus$binnary_ASV_df %>% select(-ASV_name) %>%  as.matrix())
  coord_plots_2ormore_bin <- distMatrix_dimReduc_coords(distMatrix = as.matrix(hammDistMatrix),perplexityTsne = 150)
  saveRDS(coord_plots_2ormore_bin,file = paste0(savingdir,'/','coord_plots_2ormore_bin'))
}
coord_plots_2ormore_bin  = readRDS(file = paste0(savingdir,'/','coord_plots_2ormore_bin'))

coord_plots_2ormore_bin_plots <- plot_dimReduc_coords(coord_plots_2ormore_bin)

coord_plots_2ormore_bin_plots$MDS_2d
coord_plots_2ormore_bin_plots$nMDS_2d
coord_plots_2ormore_bin_plots$TSNE_2d
coord_plots_2ormore_bin_plots$umap_2d
Run 0 stress 0.05200548 
Run 1 stress 0.05383954 
Run 2 stress 0.05610581 
Run 3 stress 0.05604351 
Run 4 stress 0.05521967 
Run 5 stress 0.0580235 
Run 6 stress 0.05492497 
Run 7 stress 0.05614902 
Run 8 stress 0.05483286 
Run 9 stress 0.05437525 
Run 10 stress 0.05562016 
Run 11 stress 0.05585861 
Run 12 stress 0.05535613 
Run 13 stress 0.05544325 
Run 14 stress 0.05372001 
Run 15 stress 0.05466198 
Run 16 stress 0.05531106 
Run 17 stress 0.05431961 
Run 18 stress 0.0560297 
Run 19 stress 0.05508792 
Run 20 stress 0.05795377 
*** Best solution was not repeated -- monoMDS stopping criteria:
    20: no. of iterations >= maxit

We have 767 ASVs and 975 samples

Code
dframe3plus = tidy_grump(Dframe = dframe,vet_ASVs2remove = vetASVs_3orMore, binnary = T)

if(!useStoredFiles){
  hammDistMatrix = hammingDist(X = dframe3plus$binnary_ASV_df %>% select(-ASV_name) %>%  as.matrix())
  coord_plots_3ormore_bin <- distMatrix_dimReduc_coords(distMatrix = as.matrix(hammDistMatrix),perplexityTsne = 150)
  saveRDS(coord_plots_3ormore_bin,file = paste0(savingdir,'/','coord_plots_3ormore_bin'))
}
coord_plots_3ormore_bin  = readRDS(file = paste0(savingdir,'/','coord_plots_2ormore_bin'))

coord_plots_3ormore_bin_plots <- plot_dimReduc_coords(coord_plots_2ormore_bin)

coord_plots_3ormore_bin_plots$MDS_2d
coord_plots_3ormore_bin_plots$nMDS_2d
coord_plots_3ormore_bin_plots$TSNE_2d
coord_plots_3ormore_bin_plots$umap_2d
Run 0 stress 0.05611373 
Run 1 stress 0.05740888 
Run 2 stress 0.05707433 
Run 3 stress 0.05944355 
Run 4 stress 0.05722508 
Run 5 stress 0.05829266 
Run 6 stress 0.05845288 
Run 7 stress 0.05789314 
Run 8 stress 0.06205236 
Run 9 stress 0.05767133 
Run 10 stress 0.06056325 
Run 11 stress 0.06041551 
Run 12 stress 0.05831889 
Run 13 stress 0.05920586 
Run 14 stress 0.05838745 
Run 15 stress 0.05855115 
Run 16 stress 0.05905985 
Run 17 stress 0.05906832 
Run 18 stress 0.05790463 
Run 19 stress 0.05859275 
Run 20 stress 0.06104219 
*** Best solution was not repeated -- monoMDS stopping criteria:
    20: no. of iterations >= maxit

598 ASVs and 971 samples.

Can we create a mixture of those tow distances and se if a pattern emerge?

Code
if(!useStoredFiles){
  hammDistMatrix = hammingDist(X = dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>%  as.matrix())
  aitDistMatrix = robCompositions::aDist(x = dframe_allASVs$ASV_composition %>% select(-name) %>% data.frame())
  
  fifty_fifty_matrix = 0.5 * normalizeDistMatrix(hammDistMatrix) + 0.5 * normalizeDistMatrix(aitDistMatrix)
  coord_plots_mixed_All <- distMatrix_dimReduc_coords(distMatrix = as.matrix(hammDistMatrix),perplexityTsne = 150)
  saveRDS(coord_plots_mixed_All,file = paste0(savingdir,'/','coord_plots_mixed_All'))
}
coord_plots_mixed_All  = readRDS(paste0(savingdir,'/','coord_plots_mixed_All'))

coord_plots_mixed_All_plots <- plot_dimReduc_coords(coord_plots_2ormore_bin)

coord_plots_mixed_All_plots$MDS_2d
coord_plots_mixed_All_plots$nMDS_2d
coord_plots_mixed_All_plots$TSNE_2d
coord_plots_mixed_All_plots$umap_2d
Run 0 stress 0.04026351 
Run 1 stress 0.04973186 
Run 2 stress 0.05058196 
Run 3 stress 0.05179752 
Run 4 stress 0.04693137 
Run 5 stress 0.04758069 
Run 6 stress 0.0511412 
Run 7 stress 0.04596287 
Run 8 stress 0.05277352 
Run 9 stress 0.04661559 
Run 10 stress 0.04677193 
Run 11 stress 0.05037888 
Run 12 stress 0.04614352 
Run 13 stress 0.05694953 
Run 14 stress 0.05074769 
Run 15 stress 0.04656415 
Run 16 stress 0.04582944 
Run 17 stress 0.05573858 
Run 18 stress 0.05206533 
Run 19 stress 0.05362203 
Run 20 stress 0.05641374 
*** Best solution was not repeated -- monoMDS stopping criteria:
    20: no. of iterations >= maxit

Haven’t compared it yet, but could be and Idea…

Here the idea is to aggregate some samples. So we would have compositions of ASVs but instead of samples we would have a less sparse and with less dimensions. The most natural could be:

  1. Compositions of ASVs, under longhurst provinces
  2. Compositions of ASVs, under longhurst provinces combined with different depths
  3. Cruises?
  4. Chunks of latitudes, and chunks of depths.

So for each aggregation we should take a look at mds, tsne, umap of the ait distances (or distance matrix using the CLR transformation)

So first lets do a bit of EDA to see what we can expect.

Code
dframe_allASVs$dframe %>% select(SampleKey,Depth,Longhurst_Short) %>%  distinct() %>% 
  ggplot(aes(Depth))+geom_histogram()
dframe_allASVs$dframe %>% select(SampleKey,Depth,Longhurst_Short) %>%  distinct() %>% 
  ggplot(aes(Depth))+geom_boxplot()

Let’s slice the dephts of the ocean in 10%.

Code
distinct_depth <- dframe_allASVs$dframe %>% select(SampleKey,Depth) %>% distinct()
qtls_0.1 = quantile(distinct_depth$Depth, seq(0,1,0.1))
qtlsNames = 1:(length(qtls_0.1)-1)
qtlsNames = ifelse(qtlsNames<10,paste0('DGR0',qtlsNames),paste0('DGR',qtlsNames))
qtls_0.1
        0%        10%        20%        30%        40%        50%        60% 
   0.00000    1.18800   15.00000   28.80741   45.00000   61.50000   81.29718 
       70%        80%        90%       100% 
 105.47600  150.64307  335.50200 2568.80000 

Here are the depth quantiles.

Code
distinct_depth = distinct_depth %>% 
  mutate(CatDepth=cut(Depth,breaks = qtls_0.1,include.lowest = T,right = T,labels = qtlsNames))
        
distinct_depth %>% group_by(CatDepth) %>% 
  summarise(Freq=n())
# A tibble: 10 × 2
   CatDepth  Freq
   <fct>    <int>
 1 DGR01       99
 2 DGR02      115
 3 DGR03       82
 4 DGR04      112
 5 DGR05       86
 6 DGR06       98
 7 DGR07       99
 8 DGR08       98
 9 DGR09       99
10 DGR10       99
Code
dframe_allASVs$dframe %>% select(SampleKey,Depth,Longhurst_Short) %>% distinct() %>% 
  mutate(CatDepth=cut(Depth,breaks = qtls_0.1,include.lowest = T,right = T,labels = qtlsNames)) %>% 
  ggplot(aes(CatDepth))+geom_bar()+
  facet_wrap(~Longhurst_Short,scales='free_y')

This is the distribution of number of samples for each GDR (group depth rank for each longhurst province.

So our first tentative will be to create compositions of asvs with this amount of combinations (GDRxLH)

Code
Lat_Depth_Composition_df=readRDS(file = paste0(savingdir,'/','Lat_Depth_Composition_df'))

if(!useStoredFiles){
  #hammDistMatrix = hammingDist(X = dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>%  as.matrix())
  aitDistMatrix = robCompositions::aDist(x = Lat_Depth_Composition_df %>% select(-name) %>% data.frame())
  Lat_Depth_ait_all_coord <- distMatrix_dimReduc_coords(distMatrix = as.matrix(aitDistMatrix),perplexityTsne = 150)
  saveRDS(Lat_Depth_ait_all_coord,file = paste0(savingdir,'/','Lat_Depth_ait_all_coord'))
}
Run 0 stress 0.08699511 
Run 1 stress 0.09028113 
Run 2 stress 0.09059218 
Run 3 stress 0.08996065 
Run 4 stress 0.09032197 
Run 5 stress 0.090564 
Run 6 stress 0.09053852 
Run 7 stress 0.09461317 
Run 8 stress 0.09119436 
Run 9 stress 0.09347122 
Run 10 stress 0.09054701 
Run 11 stress 0.09077451 
Run 12 stress 0.09002654 
Run 13 stress 0.08790095 
Run 14 stress 0.09047252 
Run 15 stress 0.08853844 
Run 16 stress 0.08927363 
Run 17 stress 0.09012554 
Run 18 stress 0.09072616 
Run 19 stress 0.09376992 
Run 20 stress 0.09012315 
*** Best solution was not repeated -- monoMDS stopping criteria:
    20: no. of iterations >= maxit
Code
Lat_Depth_ait_all_coord  = readRDS(file = paste0(savingdir,'/','Lat_Depth_ait_all_coord'))

Lat_Depth_ait_all_coord_plots <- plot_dimReduc_coords(Lat_Depth_ait_all_coord)

Lat_Depth_ait_all_coord_plots$MDS_2d

Code
Lat_Depth_ait_all_coord_plots$nMDS_2d

Code
Lat_Depth_ait_all_coord_plots$TSNE_2d

Code
Lat_Depth_ait_all_coord_plots$umap_2d

Code
LH_Depth_Composition_df=readRDS(file = paste0(savingdir,'/','LH_Depth_Composition_df'))

if(!useStoredFiles){
  #hammDistMatrix = hammingDist(X = dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>%  as.matrix())
  aitDistMatrix = robCompositions::aDist(x = LH_Depth_Composition_df %>% select(-name) %>% data.frame())
  LH_Depth_ait_all_coord <- distMatrix_dimReduc_coords(distMatrix = as.matrix(aitDistMatrix),perplexityTsne = 150)
  saveRDS(LH_Depth_ait_all_coord,file = paste0(savingdir,'/','LH_Depth_ait_all_coord'))
}
LH_Depth_ait_all_coord  = readRDS(file = paste0(savingdir,'/','LH_Depth_ait_all_coord'))

LH_Depth_ait_all_coord_plots <- plot_dimReduc_coords(LH_Depth_ait_all_coord)

LH_Depth_ait_all_coord_plots$MDS_2d
LH_Depth_ait_all_coord_plots$nMDS_2d
LH_Depth_ait_all_coord_plots$TSNE_2d
LH_Depth_ait_all_coord_plots$umap_2d
Run 0 stress 0.08528086 
Run 1 stress 0.09062615 
Run 2 stress 0.09137155 
Run 3 stress 0.08906065 
Run 4 stress 0.08833639 
Run 5 stress 0.08865741 
Run 6 stress 0.08944729 
Run 7 stress 0.08944238 
Run 8 stress 0.0890103 
Run 9 stress 0.09019254 
Run 10 stress 0.09101819 
Run 11 stress 0.08887575 
Run 12 stress 0.08926578 
Run 13 stress 0.08878498 
Run 14 stress 0.09129564 
Run 15 stress 0.08951457 
Run 16 stress 0.08989411 
Run 17 stress 0.08921273 
Run 18 stress 0.0885944 
Run 19 stress 0.08986336 
Run 20 stress 0.08904013 
*** Best solution was not repeated -- monoMDS stopping criteria:
    20: no. of iterations >= maxit

#Creating Clusters

Let’s now create and evaluate the clusters using this two ocean slices that we have